#####################################
### Matthew Cebul + Sharan Grewal ###
### Conscription & NVCs           ###
### Merging Code                  ###
#####################################

rm(list = ls())
library(readxl)
library(plyr) 
library(tidyverse)
library(haven)
library(readxl)
library(countrycode)
library(DataCombine)
library(reshape2)
library(margins)
library(naniar)

setwd("/Users/mcebul/Dropbox/Conscription/Submissions/CPS/Final")
  
#####                    
# Load Data          
#####

# NAVCO 2.1 (Chenoweth and Shay 2019)
navco <- read_csv("./ComponentData/navco_clean.csv") 

# Conscription (Toronto 2014)
toronto <- read_csv("./ComponentData/toronto_clean.csv") 

# COW National Material Capabilities 
COW <- read_csv("./ComponentData/NMC_5_0.csv") 

#Penn World Tables 
pwt <- read_csv("./ComponentData/pwt_clean.csv") 

#Maddison Project 
mpd <- read_csv("./ComponentData/mpd_clean.csv") 

#Regime type (Magaloni)
magaloni <- read_csv("./ComponentData/Magaloni.csv") 

#WDI
WDI <- read_csv("./ComponentData/WDI_clean.csv") 

#Regions indicator
dosregions <- read_csv("./ComponentData/Regions.csv") 

#BritCol Origins (Asal + Toronto 2017)
colonial <- read_dta("./ComponentData/AsalConradToronto_JCR2015_Data.dta") 

#Rivalries (Thompson Dreyer 2012)
riv <- read_csv("./ComponentData/riv_clean.csv") 

#Wars (Reiter Stam & Horowitz 2016)
Wars <- read_csv("./ComponentData/War_clean.csv") 

#Civil Liberties Index (Freedom House)
civlib <- read_csv("ComponentData/civlib_clean.csv") 

#Physical Integrity Rights (CIRI)
ciri <- read_csv("./ComponentData/CIRI_clean.csv") 

# Foreign Patronage / Client States (Casey)
client <- read_csv("./ComponentData/client_clean.csv")


#####
# Merge 
#####

# NAVCO
data <- merge(toronto, navco, by = c("ccode", "year"), all.x=TRUE)
# COW military variables
data <- merge(data, COW, by = c("ccode", "year"), all.x=TRUE)
# British colonial history
data <- merge(data, colonial[c("ccode", "year", "BRITISHCOLONY")], by=c("ccode", "year"), all.x=TRUE)
# Penn World Tables 
data <- merge(data, pwt[c("country", "year","rgdppc", "rgdpna","rgdppclag", "pop")], by=c("country", "year"), all.x=TRUE)
# Maddison Project
data <- merge(data, mpd[c("country", "year", "rgdpnapc","gdppclag")], by=c("country", "year"), all.x=TRUE)
# Magaloni regime type
data <- merge(data, magaloni[c("ccode","year", "demo_r", "demo_nr", "regime_r", "regime_nr")], by=c("ccode", "year"), all.x=TRUE)
# World Development Indicators
data <- merge(data, WDI[c("ccode", "year", "NetUsers", "Mobile", "youth")], by = c("ccode", "year"), all.x=TRUE)
# Rivalries 
data <- merge(data, riv[c("ccode", "year", "Rivalry")], by=c("ccode", "year"), all.x=TRUE)
# Wars
data <- merge(data, Wars, by=c("ccode", "year"), all.x=TRUE)
# Regions indicator
data <- merge(data, dosregions[c("country", "region")], by="country", type = "'eft")
# Civil Liberties
data <- merge(data, civlib[c("ccode", "year", "CL")], by=c("ccode", "year"), all.x=TRUE)
data <- merge(data, ciri[c("ccode", "year", "PHYSINT")], by=c("ccode", "year"), all.x=TRUE)
# Client Regimes
data <- merge(data, client[c("ccode", "year", "spons", "ally")], by=c("ccode", "year"), all.x=TRUE)


###
# Remove Unnecessary Columns
###

data <- subset(data, select = c("ccode", "year", "country", "cabbr", "recruit", "mil", "camp_name",
                                "camp_size", "camp_size_cat", "total_part", "start_date", "end_date", "status",  
                                "location", "cyear", "prim_meth", "camp_goals", "repression", "camp_support",
                                "sec_defect", "success",  "milex", "milper", "tpop", "upop",
                                "BRITISHCOLONY", "rgdppc", "rgdpna", "rgdppclag", "rgdpnapc", "gdppclag",
                                "demo_r", "demo_nr", "regime_r", "regime_nr", "NetUsers", "Mobile", 
                                "youth", "region", "Rivalry", "War", "CL", "PHYSINT", "spons", "ally"))


######################
## DEFINE VARIALBES ##
######################


#####
# NV Campaign Variables
#####

# NV CAMPAIGN ONSET (excluding anti-occupation movements)
data$NVonset <- ifelse((data$camp_goals >=0 & data$camp_goals<= 4) & data$cyear==0 & data$prim_meth==1, 1, 0)
data$NVonset[is.na(data$NVonset)] <- 0 #NA years created merging NAVCO into Toronto become 0s

# NV CAMPAIGN ONSET, COUNT
data <- data %>%
  group_by(ccode, year) %>%
  mutate(NVOnsetSum = sum(NVonset))
data$NVOnsetSum[data$NVonset!=1] <- 0

# NV CAMPAIGN, ANY STAGE (excluding anti-occupation movements)
data$NVCamp <- ifelse((data$camp_goals >=0 & data$camp_goals<= 4) & (data$cyear>=0 & data$cyear<=2) & data$prim_meth==1, 1, 0)
data$NVCamp[is.na(data$NVCamp)] <- 0 #NA years created merging NAVCO into Toronto become 0s

# VIO CAMPAIGN ONSET (excluding anti-occupation movements)
data$VioOnset <- ifelse((data$camp_goals >=0 & data$camp_goals<= 4) & data$cyear==0 & data$prim_meth==0, 1, 0)
data$VioOnset[is.na(data$VioOnset)] <- 0 #NA years created merging NAVCO into Toronto become 0s

# CAMPAIGN SIZE
# Size, Categorical (excluding anti-occupation movements)
data$Size1 <- ifelse((data$camp_goals >=0 & data$camp_goals<= 4), data$camp_size + 1, 0)
data$Size1[data$Size1==-98] <- NA # NA value correction
data$Size1[is.na(data$camp_name)] <- 0 # size zero for country-years without a campaign
data$sizeNV <- ifelse(data$NVCamp==1, data$Size1, 0) # NV campaigns only

#Size, Total Participation Per Year
data$Size2 <- ifelse(data$total_part!=-99, data$total_part,
                     ifelse(data$total_part==-99 & data$Size1==0, 0, 
                            ifelse(data$total_part==-99 & data$Size1==1, 100, 
                                   ifelse(data$total_part==-99 & data$Size1==2, 1000, 
                                          ifelse(data$total_part==-99 & data$Size1==3, 10000, 
                                                 ifelse(data$total_part==-99 & data$Size1==4, 100000, 
                                                        ifelse(data$total_part==-99 & data$Size1==5, 500000, 
                                                               ifelse(data$total_part==-99 & data$Size1==6, 1000000, NA))))))))
data$Size2 <- log1p(data$Size2) #log it

# CAMPAIGN DURATION (days)
data$Duration <- as.numeric(difftime(data$end_date, data$start_date, units = "days"))

# PAST SUCCESSFUL NAVCO CAMPAIGN
data$Past_success <- NA
for (i in 1:nrow(data)) {
  data$Past_success[i] <- sum(data$success[data$ccode==data$ccode[i] & data$year<data$year[i]], na.rm=TRUE)
}

# PAST DEFECTIONS AGAINST NAVCO CAMPAIGNS
data$defect <- ifelse(data$sec_defect=="-99", NA, as.numeric(data$sec_defect))
data$Past_defect <- NA
for (i in 1:nrow(data)) {
  data$Past_defect[i] <- sum(data$defect[data$ccode==data$ccode[i] & data$year<data$year[i]], na.rm=TRUE)
}

#####
# Conscription
#####

data$conscript <- ifelse(data$recruit==0, 1,
                         ifelse(data$recruit==1, 0, NA)) #0 if volunteer, 1 if conscript
data$conscript[data$country=="Georgia"] <- 1 #fix one missing value for Georgia
data$conscript <- factor(data$conscript)

#####
# Control Variables
#####

# GDP PER CAPITA (adding PWT to Maddison)
# Not lagged
data$gdpcomb <- data$rgdpnapc
data$gdpcomb <- ifelse(is.na(data$gdpcomb), data$rgdppc, data$gdpcomb) 
# Lagged
data$gdplagcomb <- data$gdppclag
data$gdplagcomb <- ifelse(is.na(data$gdplagcomb), data$rgdppclag, data$gdplagcomb) 
#Logged and Lagged
data$gdploglag <- log(data$gdplagcomb)

# GDP CHANGE (adding PWT to Maddison)
data$chgdpcomb <- ((data$rgdpnapc-data$gdppclag)/data$gdppclag)*100
data$chgdpcomb <- ifelse(is.na(data$chgdpcomb), 
                         ((data$rgdppc-data$rgdppclag)/data$rgdppclag)*100, data$chgdpcomb)

# MIL. SIZE (NMC, thousands of military personnel)
data$milper <- ifelse(data$milper=="-9", NA, data$milper)
#Logged, and as percent of population
data$milperper <- log1p(data$milper/data$tpop) 

# MIL. SPENDING (NMC and combined GDP variable)
data$milex <- ifelse(data$milex=="-9", NA, data$milex)
data$logmilex <- log(data$milex+1) 
data$logmilex2 <- ifelse(data$milper==0, 0, log((data$milex/data$milper)+1)) # per soldier

# MAGALONI REGIME TYPE
data$regimetype <- as.factor(data$regime_r)
data$milregime <- ifelse(data$regime_r == "Military", 1, 0)

# POPULATION (logged)
data$tpoplog <- log(data$tpop)

# URBANIZATION (NMC, % of population in cities > 100k)
data$urban <- data$upop/data$tpop
data$urban[data$urban>1] <- 1

# MOBILE (logged)
data$Mobilelog <- log1p(data$Mobile)

# DIFFUSION  (logged)
data <- data %>% # nonviolent regime change campaigns only
  group_by(year, region) %>%
  mutate(diffuse1 = sum(NVonset)) 
data$diffuse1 <- data$diffuse1 - data$NVonset #NAVCO, NV Onsets Only
data$diffuselog <- log1p(data$diffuse1) #logged

# BRITISH COLONIAL ORIGINS

# Fix Missing Values
data$BRITISHCOLONY[data$country=="Yemen Arab Republic" | data$country=="Yemen People's Republic"|
                     data$country=="Tonga" | data$country=="Swaziland" |
                     data$country=="St. Vincent and the Grenadines" | data$country=="St. Lucia" |
                     data$country=="St. Kitts and Nevis" | data$country=="Grenada" |
                     data$country=="Dominica" | data$country=="Bhutan"] <- 1
data$BRITISHCOLONY[data$country=="West Germany" | data$country=="Slovenia" |
                     data$country=="Republic of Vietnam" | data$country=="Montenegro" |
                     data$country=="Guinea" | data$country=="German Democratic Republic" |
                     data$country=="East Timor" | data$country=="Czechoslovakia"] <- 0

#Update Toronto's data through 2012
data <- data %>%
  group_by(country) %>%
  mutate(BritCol = ifelse(year>=2001, BRITISHCOLONY[year==2000], BRITISHCOLONY))

# CLIENT REGIMES (autocratic regimes only)
data$spons[is.na(data$spons) & data$year<=2010] <- 0 #NA years created merging become 0s -- client data right-censored at 2010

# REPRESSION
# "PHYSINT" = CIRI Physical Integrity Rights index. 0-8 scale, higher values = better civil liberties.
data$repression[data$repression == -99] <- NA



write_rds(data, "LargeN_clean.rds")
write.csv(data, "LargeN_clean.csv", row.names=F)


